{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "389e4a78-19aa-4c3f-9b7a-92e81f088168",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "# Create audiobooks using neural Text to speech"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "f320d6af-b255-4cb5-b60b-da840760713e",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "## Step 1: Load libraries and add service information"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "ab422610-0438-4ca4-bd16-b45e90125294",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "from synapse.ml.core.platform import *\n",
    "\n",
    "if running_on_synapse():\n",
    "    from notebookutils import mssparkutils\n",
    "\n",
    "# Fill this in with your Azure AI service information\n",
    "service_key = find_secret(\n",
    "    secret_name=\"ai-services-api-key\", keyvault=\"mmlspark-build-keys\"\n",
    ")  # Replace this line with a string like service_key = \"dddjnbdkw9329\"\n",
    "service_loc = \"eastus\"\n",
    "\n",
    "storage_container = \"audiobooks\"\n",
    "storage_key = find_secret(\n",
    "    secret_name=\"madtest-storage-key\", keyvault=\"mmlspark-build-keys\"\n",
    ")\n",
    "storage_account = \"anomalydetectiontest\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "10c83d0e-998f-4d72-a351-4ffab15f662c",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "## Step 2: Attach the storage account to hold the audio files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "55b83038-e907-4101-a914-0a32825a9d03",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "spark_key_setting = f\"fs.azure.account.key.{storage_account}.blob.core.windows.net\"\n",
    "spark.sparkContext._jsc.hadoopConfiguration().set(spark_key_setting, storage_key)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "625c7b1d-4034-4df2-b919-3775ac9c271c",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "import os\n",
    "from os.path import exists, join\n",
    "\n",
    "mount_path = f\"wasbs://{storage_container}@{storage_account}.blob.core.windows.net/\"\n",
    "if running_on_synapse():\n",
    "    mount_dir = join(\"/synfs\", mssparkutils.env.getJobId(), storage_container)\n",
    "    if not exists(mount_dir):\n",
    "        mssparkutils.fs.mount(\n",
    "            mount_path, f\"/{storage_container}\", {\"accountKey\": storage_key}\n",
    "        )\n",
    "elif running_on_databricks():\n",
    "    if not exists(f\"/dbfs/mnt/{storage_container}\"):\n",
    "        dbutils.fs.mount(\n",
    "            source=mount_path,\n",
    "            mount_point=f\"/mnt/{storage_container}\",\n",
    "            extra_configs={spark_key_setting: storage_key},\n",
    "        )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "381c3af7-e0e8-4a29-ae88-467e86a0e717",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "## Step 3: Read in text data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "56c8ebab-567f-4c1d-a2ea-1aeb5aefcf1e",
     "showTitle": false,
     "title": ""
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from pyspark.sql.functions import udf\n",
    "\n",
    "\n",
    "@udf\n",
    "def make_audio_filename(part):\n",
    "    return f\"wasbs://{storage_container}@{storage_account}.blob.core.windows.net/alice_in_wonderland/part_{part}.wav\"\n",
    "\n",
    "\n",
    "df = (\n",
    "    spark.read.parquet(\n",
    "        \"wasbs://publicwasb@mmlspark.blob.core.windows.net/alice_in_wonderland.parquet\"\n",
    "    )\n",
    "    .repartition(10)\n",
    "    .withColumn(\"filename\", make_audio_filename(\"part\"))\n",
    ")\n",
    "\n",
    "display(df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "9fcb4305-a6d4-4f48-ac6f-cf4f863c7f5f",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "## Step 4: Synthesize audio from text\n",
    "\n",
    "<div>\n",
    "<img src=\"https://mmlspark.blob.core.windows.net/graphics/NeuralTTS_hero.jpeg\" width=\"500\" />\n",
    "</div>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "2730c8cd-616a-4258-909d-912ea66d6446",
     "showTitle": false,
     "title": ""
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from synapse.ml.services.speech import TextToSpeech\n",
    "\n",
    "tts = (\n",
    "    TextToSpeech()\n",
    "    .setSubscriptionKey(service_key)\n",
    "    .setTextCol(\"text\")\n",
    "    .setLocation(service_loc)\n",
    "    .setErrorCol(\"error\")\n",
    "    .setVoiceName(\"en-US-SteffanNeural\")\n",
    "    .setOutputFileCol(\"filename\")\n",
    ")\n",
    "\n",
    "audio = tts.transform(df).cache()\n",
    "display(audio)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "157a368a-d80b-4bf8-a5cb-c1f266be2f00",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "## Step 5: Listen to an audio file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "7a0ad60f-5511-42ba-9882-e93f474f85e9",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "from IPython.display import Audio\n",
    "\n",
    "\n",
    "def get_audio_file(num):\n",
    "    if running_on_databricks():\n",
    "        return f\"/dbfs/mnt/{storage_container}/alice_in_wonderland/part_{num}.wav\"\n",
    "    else:\n",
    "        return join(mount_dir, f\"alice_in_wonderland/part_{num}.wav\")\n",
    "\n",
    "\n",
    "Audio(filename=get_audio_file(1))"
   ]
  }
 ],
 "metadata": {
  "kernel_info": {
   "name": "synapse_pyspark"
  },
  "kernelspec": {
   "display_name": "Synapse PySpark",
   "language": "Python",
   "name": "synapse_pyspark"
  },
  "language_info": {
   "name": "python"
  },
  "save_output": true,
  "synapse_widget": {
   "state": {},
   "version": "0.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
